
*****************************************************

use "1850_full.dta", clear

tab gq
keep if gq~=3 & gq~=4

local vars occscore realprop
foreach x of local vars{
gen max`x'parent = max(`x'_mom, `x'_pop)
}

gen male1850 = sex==1
replace male1850 = . if sex==.

gen white1850 = race==1
replace white1850 = . if race==.

gen black1850 = race==2
replace black1850 = . if race==.

gen occmaxoccscore = occ1950_pop if occscore_pop==maxoccscoreparent
replace occmaxoccscore = occ1950_mom if occscore_mom==maxoccscoreparent

gen dadmaxoccscore = occscore_pop==maxoccscoreparent

gen mommaxoccscore = occscore_mom == maxoccscoreparent

egen realprop_parents = rowtotal(realprop_pop realprop_mom), missing
gen nonmissingrealpropparents = realprop_parents~=.

drop if realprop_parents==.

keep histid realprop_parents nonmissingrealpropparents age gq sex male1850 white1850 black1850 relate related imprel maxoccscoreparent race stateicp countyicp occmaxoccscore dadmaxoccscore mommaxoccscore occ1950_mom occ1950_pop maxrealpropparent school occ1950 occscore perwt realprop_pop realprop_mom

*Get the parental income distribution and fraction living with parents nationally
tab realprop_parents if age>=16 & age<=18 & white1850==1

mean nonmissingrealpropparents if male1850==1 & white1850==1, over(age)
mean nonmissingrealpropparents if male1850==0 & white1850==1, over(age)


*Rename the variables to indicate the year they are from
for var histid realprop_parents age gq relate related imprel sex maxoccscoreparent race stateicp countyicp occmaxoccscore dadmaxoccscore mommaxoccscore occ1950_mom occ1950_pop maxrealpropparent school occ1950 occscore perwt realprop_pop realprop_mom: rename X X1850

*Merge with Eckert crosswalk
rename stateicp1850 icpsrst 
rename countyicp1850 icpsrcty
	
*Merge
joinby icpsrst icpsrcty using "EGLP_1850.dta", unmatched(both)

bysort nhgisst_1990 nhgiscty_1990: egen maxweight = max(weight)

rename _merge merge1850crosswalk

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

*Keep counties that VA independent cities were a part of
*https://www.bea.gov/system/files/methodologies/LAPI-Methodology.pdf
gen VAcounties = cty_fips==51177|cty_fips==51165|cty_fips==51121|cty_fips==51053

*For the VA counties replace the county fips of the surrounding county as the county of the independent city
*This will keep those records when merge to Normal Asylum counties

replace cty_fips = 51730 if cty_fips==51053
replace cty_fips = 51750 if cty_fips==51121
replace cty_fips = 51630 if cty_fips==51177
replace cty_fips = 51660 if cty_fips==51165

*Drop non-merged from crosswalk
drop if merge1850crosswalk==2 & VAcounties~=1

*Drop WV people because want people in what are today the present-day states.  
*Keep the one LA county that does not merge to crosswalk.  This does
*not matter for the main analysis since it is not a normal or asylum county, but it does matter 
*for the national distribution.  Similarly, keep the Texas county for the distribution before drop
*the small sample size states
 
tab icpsrst if merge1850crosswalk==1
tab icpsrst if merge1850crosswalk==1 & icpsrst~=45 & icpsrst~=49
drop if merge1850crosswalk==1 & icpsrst~=45 & icpsrst~=49

rename cty_fips cty_fips1850

tostring cty_fips1850, gen(cty_fipstr)
gen statefip1850 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1850 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1850, replace

*For the LA county, replace statefip equal to LA
*For the TX county, replace statefip equal to TX
tab icpsrst if statefip1850==.

replace statefip1850 = 22 if statefip1850==. & icpsrst==45
replace statefip1850 = 48 if statefip1850==. & icpsrst==49

*Keep states that did not have schools by 1860: AL, AR, AZ, CA, CO, FL, GA, IA, ID, IN, KS, 
*KY, LA, MD, ME, MN, MO, MS, MT, NC, ND, NE, NH, NM, OH, OK, OR, RI, SC, SD, TN, TX, UT, VA,
*VT, WA, WI, WV, and drop DE which did not have a normal school

#delimit ;
keep if statefip1850==1|statefip1850==5|statefip1850==4|statefip1850==6|statefip1850==8|
statefip1850==12|statefip1850==13|statefip1850==19|statefip1850==16|statefip1850==18|
statefip1850==20|statefip1850==21|statefip1850==22|statefip1850==24|statefip1850==23|
statefip1850==27|statefip1850==29|statefip1850==28|statefip1850==30|statefip1850==37|
statefip1850==38|statefip1850==31|statefip1850==33|statefip1850==35|statefip1850==39|statefip1850==40|
statefip1850==41|statefip1850==44|statefip1850==45|statefip1850==46|statefip1850==47|
statefip1850==48|statefip1850==49|statefip1850==51|statefip1850==50|
statefip1850==53|statefip1850==55|statefip1850==54;

#delimit cr
*Drop states that were not yet states on Census Day 1850: 

#delimit ;
drop if statefip1850==6|statefip1850==27|statefip1850==38|statefip1850==46|statefip1850==49|statefip==4|
statefip==8|statefip==16|statefip==30|statefip==31|statefip==32|statefip==35|statefip==40|
statefip==41|statefip==53|statefip==56|statefip==54;

#delimit cr

*Drop Georgia, RI, SC because no asylum counties

drop if statefip==13|statefip==44|statefip==45

egen taghistid = tag(histid1850)
tab realprop_parents if age>=16 & age<=18 & white1850==1 & taghistid==1

*Drop states with small samples that aren't in the main 1850-1860 Census Tree regressions in Table 1
tab realprop_parents if age>=16 & age<=18 & white1850==1 & taghistid==1 & statefip~=12 & statefip~=19 & statefip~=48 & statefip~=55


xtile binrealprop_parents = realprop_parents if taghistid==1 & age>=7 & age<=17, nquantiles(10)

gen school1850U = school==2

gen teens= age>=14 & age<=17
gen young = age>=7 & age<=13

egen ctyid = group(icpsrst icpsrcty)

*Appendix Figure A2(a): 14-17 year-old school attendance by father's real estate property value for White children

reghdfe school1850U i.binrealprop_parents if age>=14 & age<=17 & white==1 & taghistid==1, absorb(ctyid) cluster(ctyid) nocons
tab ctyid if e(sample)==1, sort
predict predwh1850ctyfe
predict predsewh1850ctyfe, stdp
gen lbciwh1850ctyfe = predwh1850ctyfe + (invt(`e(df_r)', .025)*predsewh1850ctyfe)
gen ubciwh1850ctyfe = predwh1850ctyfe + (invt(`e(df_r)', .975)*predsewh1850ctyfe)
bysort binrealprop_parents teens white: egen medrealprop_parentsbinwh = median(realprop_parents) if teens==1 & white==1
bysort binrealprop_parents: egen medrealprop_parentsbinwhU = max(medrealprop_parentsbinwh)

preserve
collapse medrealprop_parentsbinwhU predwh1850ctyfe predsewh1850ctyfe lbciwh1850ctyfe ubciwh1850ctyfe, by(binrealprop_parents)

#delimit ;
twoway(scatter predwh1850ctyfe medrealprop_parentsbinwhU)(rcap lbciwh1850ctyfe ubciwh1850ctyfe medrealprop_parentsbinwhU), graphregion(fcolor(white)) 
xtitle("Parents' Real Estate Value") ytitle("School Attendance") legend(off);
graph save "teens1850_wh.gph", replace;
graph export "teens1850_wh.pdf", replace;

restore;

drop predwh1850ctyfe predsewh1850ctyfe lbciwh1850ctyfe ubciwh1850ctyfe medrealprop_parentsbinwh medrealprop_parentsbinwhU;

#delimit cr

*Appendix Figure A2(b): 7-13 year-old school attendance by father's real estate property value for White children

reghdfe school1850U i.binrealprop_parents if age>=7 & age<=13 & white==1 & taghistid==1, absorb(ctyid) cluster(ctyid) nocons
tab ctyid if e(sample)==1, sort
predict predwh1850ctyfe
predict predsewh1850ctyfe, stdp
gen lbciwh1850ctyfe = predwh1850ctyfe + (invt(`e(df_r)', .025)*predsewh1850ctyfe)
gen ubciwh1850ctyfe = predwh1850ctyfe + (invt(`e(df_r)', .975)*predsewh1850ctyfe)
bysort binrealprop_parents young white: egen medrealprop_parentsbinwh = median(realprop_parents) if young==1 & white==1
bysort binrealprop_parents: egen medrealprop_parentsbinwhU = max(medrealprop_parentsbinwh)


preserve
collapse medrealprop_parentsbinwhU  predwh1850ctyfe predsewh1850ctyfe lbciwh1850ctyfe ubciwh1850ctyfe, by(binrealprop_parents)

#delimit ;
twoway(scatter predwh1850ctyfe medrealprop_parentsbinwhU)(rcap lbciwh1850ctyfe ubciwh1850ctyfe medrealprop_parentsbinwhU), graphregion(fcolor(white)) 
xtitle("Parents' Real Estate Value") ytitle("School Attendance") legend(off);
graph save "young1850_wh.gph", replace;
graph export "young1850_wh.pdf", replace;

restore;

#delimit cr

gen cty_fips = cty_fips1850

*Table 1, columns 1 and 2: Differences in School Enrollment between Normal School and Asylum Counties, 1850

merge m:1 cty_fips using "justnormasylum.dta"
rename _merge mergenormasylum

*Keep normal school and asylum counties
keep if mergenormasylum==3

gen pargroup2 = 1  if realprop_parents1850<=150
replace pargroup2 = 2 if realprop_parents1850>150



*Mobility by Parental Real Estate

forvalues i = 1(1)1{
preserve

gen constant = 1
keep if pargroup2==`i'
drop if realprop_parents1850==.

*Table 1, column 2: 14-17 year-olds
keep if age1850>=14 & age1850<=17

count
#delimit ;
local vars school1850U constant;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum)  constant  (mean)statefip1850 hasnormalschool  meanweight=weight maxweight school1850U, by(cty_fips1850 white1850);

#delimit cr

su constant if white1850==1, d

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==1
tab statefip hasnormalschool if white1850==1 

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==0
tab statefip hasnormalschool if white1850==0 

drop if white1850==0

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip: egen maxfracbelow10 = max(fracbelow10)

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10 in Table 1, columns 3-5

drop if maxfracbelow10>=.5
drop if statefip1850==12|statefip1850==19|statefip1850==48|statefip1850==55

sort cty_fips
set seed 762349

local vars school1850U   
foreach x of local vars{

reghdfe `x' hasnormalschool if white1850==1, absorb(statefip1850) vce(robust)
su `x' if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1850) reps(1000): reg `x' hasnormalschool i.statefip1850 if white1850==1;

#delimit cr
}
restore

*Table 1, Column 1: 7-13 year-olds
preserve
gen constant = 1
keep if pargroup2==`i'
drop if realprop_parents1850==.

keep if age1850>=7 & age1850<=13

count
#delimit ;
local vars school1850U constant;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum)  constant  (mean)statefip1850 hasnormalschool meanweight=weight maxweight school1850U, by(cty_fips1850 white1850);

#delimit cr

su constant if white1850==1, d

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==1
tab statefip hasnormalschool if white1850==1 

tab cty_fips1850 hasnormalschool if constant<=10 & white1850==0
tab statefip hasnormalschool if white1850==0 

drop if white1850==0

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip: egen maxfracbelow10 = max(fracbelow10)

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10 in Table 1, columns 3-5

*drop if maxfracbelow10>=.5
drop if statefip1850==12|statefip1850==19|statefip1850==48|statefip1850==55

sort cty_fips

local vars school1850U   
foreach x of local vars{

reghdfe `x' hasnormalschool if white1850==1, absorb(statefip1850) vce(robust)
su `x' if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1850 if e(sample)==1
outreg2 using censustree18501860.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1850) reps(1000): reg `x' hasnormalschool i.statefip1850 if white1850==1;

#delimit cr
}
restore
}
